#=============================================================================#
#
# PROJECT:        Does Islamist Terrorism Still Affect Political Attitudes?
# AUTHORS:        ** anonymized for review **
# CONTACT:        ** anonymized for review **
# LAST MODIFIED:  August 28, 2025
# 
#=============================================================================#
#
# This R file contains the code used to test for publication bias
# 
#=============================================================================#

rm(list=ls())
getwd()

# Install and load all necessary packages with the ipak function:
# i.e., check to see if packages are installed. 
# Install them if they are not, 
# then load them into the R session.

ipak <- function(pkg){  new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])]
if(length(new.pkg)) install.packages(new.pkg, dependencies = TRUE)
sapply(pkg, require, character.only = TRUE)
}
packages <- c("readxl", "metafor", "metaSEM", "ggplot2",
              "tidyverse", "dplyr", "xtable", "modelsummary", "stargazer") 
ipak(packages)


# Data ------------------------------------------------------------------------

df <- readRDS("data/df.rds") 
df <- df %>%
  mutate(StudyYear = StudyYear - 2001)
df <- df %>% 
  filter((ExactAttack_2 != "2002 Bali bombings" &  # attacks on western countries 
            ExactAttack_2 != "2008 Mumbai attacks" &  # attacks on western countries 
            ExactAttack_2 != "Gaza - Israel conflict") %>%
           replace_na(TRUE))


# Funnel plot------------------------------------------------------------------

# --- empty random-effects model 'm0' ---
m0 <- rma.mv(d, var_d, random = ~ 1 | ID_R/ID_ES_Unique, data = df)

# --- study-level data ---
dat <- data.frame(yi = m0$yi, sei = sqrt(m0$vi))

# --- pooled estimate (center line) ---
b0 <- as.numeric(coef(m0)[1])

# --- z values for pseudo-CIs ---
z90 <- qnorm(0.95)    # 90% two-sided
z95 <- qnorm(0.975)   # 95% two-sided
z99 <- qnorm(0.995)   # 99% two-sided

se_max <- max(dat$sei, na.rm = TRUE)
se_seq <- seq(0, se_max, length.out = 300)

# Build one frame with all bounds
rgn <- data.frame(
  se   = se_seq,
  lo90 = b0 - z90*se_seq, hi90 = b0 + z90*se_seq,
  lo95 = b0 - z95*se_seq, hi95 = b0 + z95*se_seq,
  lo99 = b0 - z99*se_seq, hi99 = b0 + z99*se_seq
)

# --- plot ---
p <- ggplot() +
  # Draw OUTSIDE 99% first (two ribbons: below lo99 and above hi99)
  geom_ribbon(data = rgn, aes(x = se, ymin = -Inf,  ymax = lo99, fill = "Outside 99%")) +
  geom_ribbon(data = rgn, aes(x = se, ymin =  hi99, ymax =  Inf,  fill = "Outside 99%")) +
  # Then 99%, 95%, 90% regions (widest to narrowest)
  geom_ribbon(data = rgn, aes(x = se, ymin = lo99, ymax = hi99, fill = "Within 99%")) +
  geom_ribbon(data = rgn, aes(x = se, ymin = lo95, ymax = hi95, fill = "Within 95%")) +
  geom_ribbon(data = rgn, aes(x = se, ymin = lo90, ymax = hi90, fill = "Within 90%")) +
  # 95% pseudo-CI triangle outline
  geom_line(aes(x = se_seq, y = b0 + z95*se_seq), linewidth = 0.6, linetype = 2) +
  geom_line(aes(x = se_seq, y = b0 - z95*se_seq), linewidth = 0.6, linetype = 2) +
  # Study points
  geom_point(data = dat, aes(x = sei, y = yi),
             shape = 21, size = 2.8, stroke = 0.4,
             color = "black", fill = "steelblue", alpha = 0.8) +
  # Pooled estimate
  geom_hline(yintercept = b0, linetype = "dashed", color = "red") +
  # Axes: SE vertical (reversed), effect size horizontal
  coord_flip() +
  scale_x_reverse(name = "Standard Error") +
  scale_y_continuous(name = "Effect size (Cohen's d)") +
  # Legend with distinct fills and controlled order
  scale_fill_manual(
    values = c("Within 90%"  = "grey70",
               "Within 95%"  = "grey80",
               "Within 99%"  = "grey90",
               "Outside 99%" = "white"),
    breaks = c("Within 90%", "Within 95%", "Within 99%", "Outside 99%"),
    name = "Pseudo-CI region"
  ) +
  guides(fill = guide_legend(
    override.aes = list(
      colour = c("black","black","black","black"),  # give all boxes borders
      size   = 0.5                                  # thin border
    )
  )) +
  theme_test(base_size = 13) +
  theme(panel.grid.minor = element_blank(),
        legend.position = "bottom")

print(p)

ggplot2::ggsave("output/figures/meta_funnel.pdf", p, width = 7, height = 6)


# Egger's regression tests ----------------------------------------------------

# Convenience columns
df <- df %>% mutate(
  sei   = sqrt(var_d),
  prec  = 1 / sei,
  SND   = d / sei          # standard normal deviate (used in some Egger variants)
)

# Small helper to extract coef, SE, p by term name
grab <- function(fit, term) {
  i <- which(names(coef(fit)) == term)
  c(beta = unname(coef(fit)[i]),
    se   = unname(fit$se[i]),
    p    = unname(fit$pval[i]))
}

# --- 1) Unilevel (ML) Egger tests ---
## (a) Slope on SE (meta-regression with ML)
m_uni_se <- rma(yi = d, vi = var_d, mods = ~ sei, data = df, method = "ML")
eg_unilevel_se <- grab(m_uni_se, "sei")  # beta, se, p

## (b) SND ~ precision (classic Egger; intercept is the asymmetry test)
eg_ols <- lm(SND ~ prec, data = df)      # unweighted OLS version
co_int <- coef(summary(eg_ols))["(Intercept)", ]
eg_unilevel_sndprec <- c(beta = unname(co_int["Estimate"]),
                         se   = unname(co_int["Std. Error"]),
                         p    = unname(co_int["Pr(>|t|)"]))


# --- 2) Multilevel (ML) Egger tests (rma.mv) ---
## (a) yi ~ SE
m_mv_se <- rma.mv(d, var_d, mods = ~ sei,
                  random = ~ 1 | ID_R/ID_ES_Unique,
                  data = df, method = "ML")
eg_ml_se <- grab(m_mv_se, "sei")

## (b) yi ~ precision
m_mv_prec <- rma.mv(d, var_d, mods = ~ prec,
                    random = ~ 1 | ID_R/ID_ES_Unique,
                    data = df, method = "ML")
eg_ml_prec <- grab(m_mv_prec, "prec")


# 3) --- Cluster-robust (CR2) p-values for multilevel ---
eg_ml_se_cr  <- c(beta = NA, se = NA, p = NA)
eg_ml_prec_cr<- c(beta = NA, se = NA, p = NA)

if (requireNamespace("clubSandwich", quietly = TRUE)) {
  library(clubSandwich)
  cr_se   <- coef_test(m_mv_se,   vcov = "CR2", cluster = df$ID_R, test = "Satterthwaite")
  cr_prec <- coef_test(m_mv_prec, vcov = "CR2", cluster = df$ID_R, test = "Satterthwaite")
  # Extract the row for sei / prec
  r1 <- cr_se[rownames(cr_se) == "sei", , drop = FALSE]
  r2 <- cr_prec[rownames(cr_prec) == "prec", , drop = FALSE]
  eg_ml_se_cr   <- c(beta = unname(r1[1,"beta"]), se = unname(r1[1,"SE"]),    p = unname(r1[1,"p_Satt"]))
  eg_ml_prec_cr <- c(beta = unname(r2[1,"beta"]), se = unname(r2[1,"SE"]),    p = unname(r2[1,"p_Satt"]))
}


# --- 4) Tidy results table ---
eg_tab <- data.frame(
  Model = c("Unilevel (ML), slope on SE",
            "Unilevel (LM), SND ~ precision (intercept)",
            "Multilevel (ML), slope on SE",
            "Multilevel (ML), slope on precision",
            "Multilevel (ML+CR2), slope on SE",
            "Multilevel (ML+CR2), slope on precision"),
  Coef  = c(eg_unilevel_se["beta"],
            eg_unilevel_sndprec["beta"],
            eg_ml_se["beta"],
            eg_ml_prec["beta"],
            eg_ml_se_cr["beta"],
            eg_ml_prec_cr["beta"]),
  SE    = c(eg_unilevel_se["se"],
            eg_unilevel_sndprec["se"],
            eg_ml_se["se"],
            eg_ml_prec["se"],
            eg_ml_se_cr["se"],
            eg_ml_prec_cr["se"]),
  p     = c(eg_unilevel_se["p"],
            eg_unilevel_sndprec["p"],
            eg_ml_se["p"],
            eg_ml_prec["p"],
            eg_ml_se_cr["p"],
            eg_ml_prec_cr["p"])
)

print(eg_tab)
write.csv(eg_tab, "output/tables/pubbias_eggers_table.csv", row.names = FALSE)


# Create xtable with a caption + label
eg_tab_tex <- xtable(eg_tab,
                     caption = "Egger's regression tests for small-study effects.",
                     label   = "tab:pubbias_eggers",
                     align   = c("l","l","c","c","c") )

# Print to .tex file
print(eg_tab_tex,
      include.rownames = FALSE,
      file = "output/tables/pubbias_eggers_table.tex",
      table.placement = "H",     # force placement here
      sanitize.text.function = identity)


# Impact of publication status -----------------------------------------------

# --- publication status dummy ---
df$Published <- ifelse(df$TypeReport==1, 1, 0) # 1 = published; 0 = unpublished

# --- various random-effects models ---

## Get estimates for...
# ... Model 0
m0.pub <- rma.mv(d, var_d, 
                 mods = ~ Published, 
                 random = ~ 1 | ID_RS/ID_ES_Unique,
                 data = df)
m0.pub

# ... Model 1
m1.pub <- rma.mv(d, var_d, 
                 mods = ~ StudyYear*Published, # StudyYear as moderator
                 random = ~ 1 | ID_R/ID_ES_Unique, # three-level model
                 data = df)
m1.pub

# ... Model 2
m2.pub <- rma.mv(d, var_d, 
             mods = ~ StudyYear*Published + # StudyYear as moderator +
               CasualtiesLess10 + Casualties10to100 + Casualties100plus +
               Exp + NatExp +
               ProbSample +
               Outgroup + Rally +
               US,
             random = ~ 1 | ID_R/ID_ES_Unique, # three-level model
             data = df)
m2.pub

# --- print table ----
models <- list(
  "Baseline" = m0.pub,
  "Model 1" = m1.pub,
  "Model 2" = m2.pub
)
cm <- c("StudyYear"     = "Year of study",
        "Published"          = "Publication dummy",
        "StudyYear:Published" = "Year of study * Published",
        "intercept"     = "Constant")
modelsummary(models, 
             coef_map = cm,
             stars = c("+" = .1, "*" = .05, "**" = .01, "***" = .001),
             fmt = 3,
             gof_omit = 'IC|Log|Adj|AIC|BIC|RMSE',
             escape = FALSE,
             output = "latex_tabular")
